Notebook Summary

This script plots individual antibody trajectories in the Haitian cohort over successive measurement visits and also by age. Children were measured up to 9 times, but the majority were measured 5 times, so figures by measurement round include up to 6 visits.

Script preamble

#-----------------------------
# preamble
#-----------------------------
library(here)
## here() starts at /Users/benarnold/enterics-seroepi
here()
## [1] "/Users/benarnold/enterics-seroepi"
# load packages
library(tidyverse)
## ── Attaching packages ──────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  2.0.1     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.0     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## ── Conflicts ─────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(kableExtra)
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
# set up for parallel computing
# configure for a laptop (use only 3 cores)
library(foreach)
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
library(doParallel)
## Loading required package: iterators
## Loading required package: parallel
registerDoParallel(cores=3)

# bright color blind palette:  https://personal.sron.nl/~pault/ 
cblack <- "#000004FF"
cblue <- "#3366AA"
cteal <- "#11AA99"
cgreen <- "#66AA55"
cchartr <- "#CCCC55"
cmagent <- "#992288"
cred <- "#EE3333"
corange <- "#EEA722"
cyellow <- "#FFEE33"
cgrey <- "#777777"
#-----------------------------
# load the formatted data
# created with 
# asembo-enteric-ab-data-format.Rmd
#-----------------------------
dl <- readRDS(here("data","haiti_analysis2.rds"))

# list the enteric antigens and formatted labels for them
mbavars <- c("vsp3","vsp5","cp17","cp23","leca","etec","salb","sald","norogi","norogii")

mbalabs <- c("Giardia VSP-3","Giardia VSP-5","Cryptosporidium Cp17","Cryptosporidium Cp23","E. histolytica LecA","Salmonella LPS group B","Salmonella LPS group D","ETEC LT β subunit","Norovirus GI.4","Norovirus GII.4.NO")

Identify incident changes

Among children, identify those that changed status between enrollment and follow-up. Those who changed from negative to positive are seroconverters (seroi below), and those who changed from positive to negative are seroreverters (seror below).

#-----------------------------
# identify seropositive measures
# hierarchy of information for 
# cutoffs:
# 1. ROC
# 2. mixture model based
# 3. estimated among presumed unexposed
#
# store the cutoff value used
# for figures
#-----------------------------
dl <- dl %>%
  mutate(seropos=ifelse(!is.na(posroc),posroc,posmix),
         serocut=ifelse(!is.na(posroc),roccut,mixcut),
         serocut_desc=ifelse(!is.na(posroc),"ROC","Mixture Model")) %>%
  mutate(seropos=ifelse(!is.na(seropos),seropos,posunex),
         serocut_desc=ifelse(!is.na(serocut),serocut_desc,"Unexp dist"),
         serocut=ifelse(!is.na(serocut),serocut,unexpcut)) 


#-----------------------------
# identify incident 
# seroconversions and reversions
#-----------------------------

# group the data by child and
# use lags to identify
# time in years between measurements,
# sero-conversions + sero-reversions 
# between measurements
# set the first measurement to 
# missing for the incidence indicators
# (using the is.na(age_diff) to identify them)
dl2 <- dl %>%
  group_by(antigen,id) %>% 
  arrange(antigen,id,sampleid) %>%
  mutate(age_min  = min(age),
         age_diff = age - lag(age),
         
         logmfi_lag  = lag(logmfi),
         logmfi_lead = lead(logmfi),
         logmfi_dlag  = logmfi - logmfi_lag,
         logmfi_dlead = logmfi_lead - logmfi,
         
         # incident seroconversions and reversions
         # including cumulative numbers
         seropos_lag  = lag(seropos),
         seroi = ifelse(seropos==1 & seropos_lag==0,1,0),
         seroi = ifelse(is.na(age_diff),NA,seroi),
         seroin = cumsum(ifelse(is.na(seroi),0,seroi)),
         seroin = ifelse(seroi==1,seroin,0),
         seror = ifelse(seropos==0 & seropos_lag==1,1,0),
         seror = ifelse(is.na(age_diff),NA,seror),
         serorn = cumsum(ifelse(is.na(seror),0,seror)),
         serorn = ifelse(seror==1,serorn,0)
         ) %>%
  select(
         id,sampleid,
         starts_with("age"),
         pathogen,antigen,antigenf,
         mfi,logmfi,logmfi_dlag,logmfi_dlead,
         roccut,mixcut,serocut,
         posroc,posmix,seropos,seroi,seroin,seror,serorn,
         -logmfi_lag,-logmfi_lead,-seropos_lag)

# incident seroconversions based on a 4-fold increase in MFI
# with a second measure above the seropositivity cutoff
# incident seroreversions based on a 4-fold decrease in MFI
# with the first measure above the seropositivity cutoff
dl2 <- dl2 %>%
  mutate(seroi4fold = ifelse(logmfi_dlag>log10(4) & logmfi>serocut,1,0),
         seroin4fold = cumsum(ifelse(is.na(seroi4fold),0,seroi4fold)),
         seroin4fold = ifelse(seroi4fold==1,seroin4fold,0),
         
         seror4fold=ifelse(logmfi_dlag< -log10(4) & lag(logmfi)>serocut,1,0),
         serorn4fold = cumsum(ifelse(is.na(seror4fold),0,seror4fold)),
         serorn4fold = ifelse(seror4fold==1,serorn4fold,0)
         )

Create labels for different seroconversion patterns

dl2pp <- dl2 %>%
  arrange(antigen,id,sampleid) %>%
  group_by(antigen,id) %>%
  mutate(measnum=row_number()) %>%
  mutate(
    logmfi_chng = ifelse(logmfi_dlag>0,"Increasing","Decreasing"),
    logmfi_chng = ifelse(is.na(logmfi_chng) & logmfi_dlead>0,"Increasing",logmfi_chng),
    logmfi_chng = ifelse(is.na(logmfi_chng) & logmfi_dlead<0,"Decreasing",logmfi_chng),
    logmfi_chng = factor(logmfi_chng,levels=c("Increasing","Decreasing")),
    
    sero_chng = ifelse(lead(seroi4fold)==1,">4-fold increase to above cutoff","<4-fold change"),
    sero_chng = ifelse(lead(seror4fold)==1,">4-fold decrease from above cutoff",sero_chng),
    sero_chng = ifelse(is.na(sero_chng),lead(sero_chng),sero_chng),
    sero_chng = ifelse(is.na(sero_chng),lag(sero_chng),sero_chng),
    sero_chng = factor(sero_chng,levels=c(">4-fold increase to above cutoff",">4-fold decrease from above cutoff","<4-fold change")),
    
    sero_comp = ifelse(lead(seroi)==1 & lead(seroi4fold==1),">4-fold increase, across cutoff","<4-fold change"),
    sero_comp = ifelse(lead(seroi)==0 & lead(seroi4fold==1),">4-fold increase, above cutoff",sero_comp),
    sero_comp = ifelse(lead(seror4fold)==1,">4-fold decrease",sero_comp),
    sero_comp = factor(sero_comp,levels=c(">4-fold increase, across cutoff",">4-fold decrease",">4-fold increase, above cutoff","<4-fold change")),
    
    everseroi = max(seroi,na.rm=TRUE)
    
  ) %>%
  mutate(antigenf=factor(antigenf,levels=mbalabs)) # re-level the factor to put Salmonella on same row

# custom log labels
log10labs <- c( 
  expression(10^0),
  expression(10^1),
  expression(10^2),
  expression(10^3),
  expression(10^4)
)

Longitudinal child trajectories

# Individual trajectories by age
indivp_age <- ggplot(filter(dl2pp,!is.na(sero_chng)), aes(x=age, y=logmfi, group=factor(id),color=sero_chng) ) +
    facet_wrap(~antigenf,nrow=6,ncol=2)+
    geom_line(alpha=0.3) +
    geom_hline(aes(yintercept=serocut),linetype="dashed") +
    scale_color_manual(values=c(corange,cteal,"gray70"),
                       guide=guide_legend(title="MFI change:", 
                                          override.aes = list(alpha=1),
                                          ncol=2,nrow=2
                                          ))+
    xlab("Age in years") +
    ylab("Luminex Response (MFI-bg)") +
    coord_cartesian(ylim=c(0, 4.5),xlim=c(0,11))+
    scale_y_continuous(breaks=0:4,labels=log10labs) +
    scale_x_continuous(breaks=0:11) +
    theme_minimal() +
    theme(
      legend.position="top",
      panel.grid.minor=element_blank(),
      panel.grid.major.y=element_blank(),
      axis.ticks.y=element_line(color="gray40")
    )
  
indivp_age

# Individual trajectories by age, comparison of classifications
indivp_age <- ggplot(filter(dl2pp,!is.na(sero_comp)), aes(x=age, y=logmfi, group=factor(id),color=sero_comp) ) +
    facet_wrap(~antigenf,nrow=6,ncol=2)+
    geom_line(alpha=0.3) +
    geom_hline(aes(yintercept=serocut),linetype="dashed") +
    scale_color_manual(values=c(corange,cteal,cmagent,"gray70"),
                       guide=guide_legend(title="MFI change:",
                                          override.aes = list(alpha=1),
                                          nrow=2,ncol=2
                                          ))+
    xlab("Age in years") +
    ylab("Luminex Response (MFI-bg)") +
    coord_cartesian(ylim=c(0, 4.5),xlim=c(0,11))+
    scale_y_continuous(breaks=0:4,labels=log10labs) +
    scale_x_continuous(breaks=0:11) +
    theme_minimal() +
    theme(
      legend.position="top",
      panel.grid.minor=element_blank(),
      panel.grid.major.y=element_blank(),
      axis.ticks.y=element_line(color="gray40")
    )
  
indivp_age

# Individual trajectories by measurement (1 to 6)
# since only 29 children were measured 7+ times
table(dl2pp$measnum[dl2pp$antigen=="vsp3"])
## 
##   1   2   3   4   5   6   7   8   9 
## 142 142 140 131 111  66  29   9   1
# summarize age distribution among measurements 1-6
summary(dl2pp$age[dl2pp$antigen=="vsp3" & dl2pp$measnum<=6])
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.00274  2.54097  4.30000  4.42293  5.98542 11.54167
indivp_meas <- ggplot(filter(dl2pp,!is.na(sero_chng) & measnum<=6), aes(x=measnum, y=logmfi, group=factor(id),color=sero_chng) ) +
    facet_wrap(~antigenf,nrow=6,ncol=2)+
    geom_line(alpha=0.3) +
    geom_hline(aes(yintercept=serocut),linetype="dashed") +
    scale_color_manual(values=c(corange,cteal,"gray70"),
                       guide=guide_legend(title="MFI change:",
                                          override.aes = list(alpha=1),
                                          nrow=2,ncol=2
                                          ))+
    xlab("Measurement number") +
    ylab("Luminex Response (MFI-bg)") +
    coord_cartesian(ylim=c(0, 4.5),xlim=c(1,6))+
    scale_y_continuous(breaks=0:4,labels=log10labs) +
    scale_x_continuous(breaks=1:6) +
    theme_minimal(base_size=16) +
    theme(
      legend.position="top",
      panel.grid.minor=element_blank(),
      panel.grid.major.y=element_blank(),
      axis.ticks.y=element_line(color="gray40")
    )
  
indivp_meas

Figure 4 - supplement 1, Individual trajectories by measurement round

# Individual trajectories by measurement (1 to 6)
# since only 29 children were measured 7+ times
# compare seroconversion classification
indivp_meas2 <- ggplot(filter(dl2pp,!is.na(sero_comp) & measnum<=6), aes(x=measnum, y=logmfi, group=factor(id),color=sero_comp) ) +
    facet_wrap(~antigenf,nrow=6,ncol=2)+
    geom_line(alpha=0.3) +
    geom_hline(aes(yintercept=serocut),linetype="dashed") +
    scale_color_manual(values=c(corange,cteal,cmagent,"gray70"),
                       guide=guide_legend(title="MFI change:",
                                          override.aes = list(alpha=1),
                                          nrow=2,ncol=2
                                          ))+
    xlab("Measurement number") +
    ylab("Luminex Response (MFI-bg)") +
    coord_cartesian(ylim=c(0, 4.5),xlim=c(1,6))+
    scale_y_continuous(breaks=0:4,labels=log10labs) +
    scale_x_continuous(breaks=1:6) +
    theme_minimal() +
    theme(
      legend.position="top",
      panel.grid.minor=element_blank(),
      panel.grid.major.y=element_blank(),
      axis.ticks.y=element_line(color="gray40")
    )
  
indivp_meas2

# save PDF and TIFF versions
ggsave(filename=here("figs","Fig4sup1-haiti-indiv-ab-trajectories-meas.pdf"),plot = indivp_meas2, device=cairo_pdf, width=6,height=12)
ggsave(filename=here("figs","Fig4sup1-haiti-indiv-ab-trajectories-meas.TIFF"),plot = indivp_meas2, device='tiff', width=6,height=12)
dl2plot <- dl2 %>% group_by(id)
dl2plot$sid <- group_indices(dl2plot)
dl2plot$sid <- factor(dl2plot$sid)

# color by MFI increases/decreases or seroconversion/reversion statsus
dl2plot <- dl2plot %>%
  mutate(
    logmfi_chng  = ifelse(logmfi_dlag>0,"Increasing","Decreasing"),
    logmfi_chng = ifelse(is.na(logmfi_chng) & logmfi_dlead>0,"Increasing",logmfi_chng),
    logmfi_chng = ifelse(is.na(logmfi_chng) & logmfi_dlead<0,"Decreasing",logmfi_chng),
    logmfi_chng = factor(logmfi_chng,levels=c("Increasing","Decreasing")),
    
    sero_chng = ifelse(lead(seroi)==1,"Seroconversion","No change"),
    sero_chng = ifelse(lead(seror)==1,"Seroreversion",sero_chng),
    sero_chng = ifelse(is.na(sero_chng),lead(sero_chng),sero_chng),
    sero_chng = factor(sero_chng,levels=c("Seroconversion","Seroreversion","No change"))
    
  )


# custom color blind color palette is in the preamble chunck
# pcols <- c(cred,corange,cgreen,cteal,cblue,cmagent)
pcols <- c(corange,cred,cmagent,cblue,cteal,cgreen)

# custom log labels
log10labs <- c( 
  expression(10^0),
  expression(10^1),
  expression(10^2),
  expression(10^3),
  expression(10^4)
)

# plot all trajectories, colored (doesn't look that great)
# p <- ggplot(dl2plot, aes(x=age, y=logmfi, group=factor(id),color=sero_chng) ) +
#     facet_wrap(~antigenf,nrow=6,ncol=2)+
#     geom_line(alpha=0.1) +
#     guides(colour=FALSE) +
#     guides(alpha=FALSE) +
#     # geom_line(data=filter(dl2plot,sid==iid),aes(x=age,y=logmfi),size=1.2) +
#     scale_color_manual(values=c(corange,cteal,cgrey))+
#     xlab("Child Age (Years)") +
#     ylab("Luminex Response (MFI-bg)") +
#     coord_cartesian(ylim=c(0, 4.5))+
#     scale_y_continuous(breaks=0:4,labels=log10labs) +
#     scale_x_continuous(limits=c(0,10),breaks=seq(0,10,by=2)) +
#     theme_minimal() +
#     theme(
#       # strip.text = element_text(size=16),
#       # axis.text.x=element_text(size=12),
#       # axis.text.y=element_text(size=12),
#       # axis.title.x=element_text(size=14),
#       # axis.title.y=element_text(size=14),
#       legend.position="none"
#     )
# 
# p

# highlight a handfull of example individual trajectories
for(iid in c(20,24,30,34,35,38,43,47,72,76,77,119,135)) {
  p <- ggplot(dl2plot, aes(x=age, y=logmfi, group=factor(id)) ) +
    facet_wrap(~antigenf,nrow=6,ncol=2)+
    geom_line(alpha=0.1,color=cgrey) +
    guides(colour=FALSE) +
    guides(alpha=FALSE) +
    geom_hline(aes(yintercept=serocut),linetype="dashed") +
    geom_line(data=filter(dl2plot,sid==iid),aes(x=age,y=logmfi,color=sero_chng)) +
    scale_color_manual(values=c(corange,cteal,cgrey))+
    xlab("Child Age (Years)") +
    ylab("Luminex Response (MFI-bg)") +
    coord_cartesian(ylim=c(0, 4.5))+
    scale_y_continuous(breaks=0:4,labels=log10labs) +
    scale_x_continuous(limits=c(0,10),breaks=seq(0,10,by=2)) +
    theme_minimal() +
    theme(
      legend.position="none"
    )
  
  p

  ggsave(filename=here("figs",paste("haiti-spaghetti-",iid,".pdf",sep="")),plot = p,device=cairo_pdf,width=5,height=14)

}
## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

## Warning: Removed 19 rows containing missing values (geom_path).

Method agreement for identifying seroconversion

Compare the agreement in the identification of seroconversions based on two approaches: crossing the seropositivity threshold, and a 4-fold increase in MFI. Compare the approaches separately for the primary boosting event and subsequent boosting

#----------------------------------
# Create a factor that summarizes 
# seroconversion status based 
# on seropositivity cutoff, with 4 categories:
# seroreversion, no change, 1st seroconversion, 
# 2nd or 3rd seroconversion
#----------------------------------
dl3 <- dl2 %>%
  mutate(seroinf=cut(seroin,breaks=c(-1,0,1,5),
                     labels=c("No change", 
                              "Primary seroconversion", 
                              "Secondary seroconversion")),
         serostat = seroinf
  )

dl3$serostat = factor(dl3$serostat,levels=c("Seroreversion",levels(dl3$seroinf))) 
dl3$serostat[dl3$seror==1] <- "Seroreversion"


#----------------------------------
# Create a factor that summarizes 
# seroconversion status based on a 4-fold increase in MFI
# with 4 categories:
# seroreversion, no change, 1st seroconversion, 
# 2nd to 4th seroconversion
#----------------------------------
dl3 <- dl3 %>%
  mutate(seroinf4fold=cut(seroin4fold,breaks=c(-1,0,1,5),
                     labels=c("No change", 
                              "Primary boosting", 
                              "Secondary boosting")),
         serostat4fold = seroinf4fold
  )

dl3$serostat4fold = factor(dl3$serostat4fold,levels=c("Seroreversion",levels(dl3$seroinf4fold))) 
dl3$serostat4fold[dl3$seror4fold==1] <- "Seroreversion"

Compare classification of incident seroconversions

dclass <- foreach(ab=levels(dl3$antigenf),.combine=rbind) %do% {
  dcompi <- dl3 %>% filter(antigenf==ab)
  tabi <- as.vector(table(dcompi$seroi,dcompi$seroi4fold))
  names(tabi) <- c("sp04f0","sp14f0","sp04f1","sp14f1")
  kappai <- psych::cohen.kappa(table(dcompi$seroi,dcompi$seroi4fold))$kappa
  tabi <- data.frame(t(tabi))
  tabi <- tabi %>%
    mutate(Nobs=sp04f0+sp14f0+sp04f1+sp14f1,
           agreement=(sp04f0+sp14f1)/Nobs,
           kappa=kappai,
           antigenf=ab) %>%
    select(antigenf,Nobs,everything())
  tabi

}


knitr::kable(dclass,digits=3,caption="Classification agreement of seroconversion by based on seropositivity cutoffs and a 4-fold increase in MFI",
             col.names = c("Antigen","N", "Seropos 0\n4-fold incr. 0","Seropos 1\n4-fold incr. 0","Seropos 0\n4-fold incr. 1","Seropos 1\n4-fold incr. 1","Agreement","Kappa")) %>%
  kable_styling(bootstrap_options = "striped",full_width = TRUE)
Classification agreement of seroconversion by based on seropositivity cutoffs and a 4-fold increase in MFI
Antigen N Seropos 0 4-fold incr. 0 Seropos 1 4-fold incr. 0 Seropos 0 4-fold incr. 1 Seropos 1 4-fold incr. 1 Agreement Kappa
Giardia VSP-3 629 486 32 30 81 0.901 0.663
Giardia VSP-5 629 482 27 37 83 0.898 0.660
Cryptosporidium Cp17 629 441 12 106 70 0.812 0.444
Cryptosporidium Cp23 629 455 10 77 87 0.862 0.587
E. histolytica LecA 629 499 11 33 86 0.930 0.755
Salmonella LPS group B 629 479 19 66 65 0.865 0.528
Salmonella LPS group D 629 504 27 36 62 0.900 0.604
ETEC LT β subunit 629 596 0 22 11 0.965 0.487
Norovirus GI.4 629 502 11 47 69 0.908 0.652
Norovirus GII.4.NO 629 507 13 55 54 0.892 0.555

Distribution of change in MFI

Distribution of changes in MFI by seroconversion status

dl3plot <- dl3
                          
# calculate Ns in each seroconversion category to print in the figure
epiNs <- dl3plot %>%
  group_by(pathogen,antigenf,serostat4fold) %>%
  summarize(n=n()) %>%
  filter(!is.na(serostat4fold)) %>%
  mutate(nlab=paste("(n=",n,")",sep=""))


# custom color blind color palette is in the preamble chunck
pcols <- c(cred,corange,cgreen,cteal,cblue,cmagent)
# pcols <- c(corange,cred,cmagent,cblue,cteal,cgreen)

set.seed(123)
diffmfi_byepisode <- ggplot(data=filter(dl3plot,!is.na(serostat4fold)),aes(x=serostat4fold,y=logmfi_dlag,fill=pathogen,color=pathogen)) +
  facet_wrap(~antigenf,nrow=6,ncol=4)+ 
  geom_jitter(position = position_jitter(width = 0.2), alpha = 0.2,pch=19,size=0.5) +
  geom_boxplot(notch=FALSE,outlier.color=NA,width=0.5,alpha=0.5,fill=NA,color="grey20")+
  geom_hline(aes(yintercept = log10(4)),lty="dashed") +
  geom_hline(aes(yintercept = -log10(4)),lty="dashed") +

  geom_hline(aes(yintercept = 0)) +
  # add Ns
  geom_text(data=epiNs,aes(x=serostat4fold,y=-3.25,label=nlab),col="gray40",size=2) +
  scale_y_continuous(expand=c(0,0),limits=c(-3.5,4.5),breaks=-3:4) +
  scale_fill_manual(values=pcols)+
  scale_color_manual(values=pcols)+
  labs(title="",x="Seroconversion status based on 4-fold change in MFI",y="Change in log10 Luminex Response (MFI-bg) between measurements") +
  theme_minimal(base_size=12)+
  theme(
    panel.grid.minor.x = element_blank(),
    axis.text.x=element_text(angle=45,hjust=1),
    legend.position="none"
  )

diffmfi_byepisode
## Warning: Removed 624 rows containing non-finite values (stat_boxplot).
## Warning: Removed 624 rows containing missing values (geom_point).

Distribution of changes in MFI by age category

# Difference in MFI values - boxplot
dl3plot2 <- dl2 %>%
  mutate(agecat=cut(age,breaks=c(0,0.99,1.99,2.99,3.99,4.99,12),labels=c("0","1","2","3","4","5+")))

# calculate Ns in each age category to print in the figure
ageNs <- dl3plot2 %>%
  group_by(pathogen,antigenf,agecat) %>%
  summarize(n=n()) %>%
  filter(!is.na(agecat)) %>%
  mutate(nlab=paste("(n=",n,")",sep=""))

# custom color blind color palette is in the preamble chunck
pcols <- c(cred,corange,cgreen,cteal,cblue,cmagent)
# pcols <- c(corange,cred,cmagent,cblue,cteal,cgreen)

diffmfi_byage <- ggplot(data=dl3plot2,aes(x=agecat,y=logmfi_dlead,fill=pathogen,color=pathogen)) +
  facet_wrap(~antigenf,nrow=6,ncol=4)+ # ,scales='free_y'
  geom_jitter(position = position_jitter(width = 0.2), alpha = 0.2,pch=19) +
  geom_boxplot(notch=FALSE,outlier.color=NA,width=0.5,alpha=0.5,fill=NA,color="grey20")+
  geom_hline(aes(yintercept = 0)) +
  geom_text(data=ageNs,aes(x=agecat,y=-3.75,label=nlab),col="gray40",size=2) +

  scale_y_continuous(expand=c(0,0),limits=c(-4,4.5),breaks=-3:4) +
  scale_fill_manual(values=pcols)+
  scale_color_manual(values=pcols)+
  labs(title="",x="Age at beginning of period (years completed)",y="Change in log10 Luminex Response (MFI-bg) between measurements") +
  theme_minimal(base_size=12)+
  theme(
    panel.grid.minor.x = element_blank(),
    legend.position="none"
  )

diffmfi_byage
## Warning: Removed 1420 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1420 rows containing missing values (geom_point).

Session Info

sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2        doParallel_1.0.11     iterators_1.0.9      
##  [4] foreach_1.4.4         ROCR_1.0-7            gplots_3.0.1         
##  [7] kableExtra_0.8.0.0001 forcats_0.3.0         stringr_1.3.1        
## [10] dplyr_0.7.8           purrr_0.2.5           readr_1.1.1          
## [13] tidyr_0.8.0           tibble_2.0.1          ggplot2_3.1.0        
## [16] tidyverse_1.2.1       here_0.1             
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.0         lubridate_1.7.4    lattice_0.20-35   
##  [4] gtools_3.5.0       assertthat_0.2.0   rprojroot_1.3-2   
##  [7] digest_0.6.18      psych_1.8.4        R6_2.3.0          
## [10] cellranger_1.1.0   plyr_1.8.4         backports_1.1.2   
## [13] evaluate_0.12      highr_0.6          httr_1.4.0        
## [16] pillar_1.3.1       rlang_0.3.1        lazyeval_0.2.1    
## [19] readxl_1.1.0       rstudioapi_0.9.0   gdata_2.18.0      
## [22] rmarkdown_1.11     foreign_0.8-70     munsell_0.5.0     
## [25] broom_0.4.4        compiler_3.5.0     modelr_0.1.2      
## [28] pkgconfig_2.0.2    mnormt_1.5-5       htmltools_0.3.6   
## [31] tidyselect_0.2.5   codetools_0.2-15   viridisLite_0.3.0 
## [34] crayon_1.3.4       withr_2.1.2        bitops_1.0-6      
## [37] grid_3.5.0         nlme_3.1-137       jsonlite_1.6      
## [40] gtable_0.2.0       magrittr_1.5       scales_1.0.0      
## [43] KernSmooth_2.23-15 cli_1.0.1          stringi_1.2.2     
## [46] reshape2_1.4.3     xml2_1.2.0         tools_3.5.0       
## [49] glue_1.3.0         hms_0.4.2          yaml_2.2.0        
## [52] colorspace_1.3-2   caTools_1.17.1     rvest_0.3.2       
## [55] knitr_1.20         bindr_0.1.1        haven_1.1.1